************************************************************************
* Initialize: Cleaning data sets for the concurrent election project *
************************************************************************

cls
clear all
clear matrix
set more off
capture log close

log using "C:/Userdata/Shared/Logs/Concurrent elections/initialize.text", replace text

cd "C:/Userdata/Shared/Dofiles/"

global dodatadf "DoData/Concurrent elections"									// DoData dofiles
global doanalysisdf "DoAnalysis/Concurrent elections"							// Doanalysis dofiles
global usingdata "E:/ProjData/Concurrent elections/"							// Using data directory
global rawdata "D:/SCB_ConPol/Stata"											// Raw data directory

do "$dodatadf/programs.do"														//See separate program dofile
*

** RTB 2010: Date of citizenship etc 2010 sample **
use "$rawdata/RTB/RTB_2010", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0																	// Some very few cases with multiple entries in registry data. Removing these.
keep LopNr Lan Kommun Forsamling Civil CivDat Medblandnamn MedBdat
compress
save "$usingdata/RTB_2010.dta", replace

** RTB 1998: Date of citizenship 1998 (used for 1994 sample) **  				// Need to be based on RTB 1998 since we only have citizenship date for previous (lagged) years from that year 
use "$rawdata/RTB/RTB_1998", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0
keep LopNr Lan Kommun Forsamling Civil CivDat Medblandnamn MedBDat
rename MedBDat MedBdat 
compress
save "$usingdata/RTB_1998.dta", replace

** RTB 1994 **  																// For Oaxaca decompostion analysis. Do not contain date of citizenship 
use "$rawdata/RTB/RTB_1994", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0
keep LopNr Lan Kommun Forsamling Civil 
compress
save "$usingdata/RTB_1994.dta", replace

** RTB 2006: 
use "$rawdata/RTB/RTB_2006", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0																	
keep LopNr Lan Kommun Forsamling Civil CivDat Medblandnamn MedBdat
compress
save "$usingdata/RTB_2006.dta", replace


** Date of migration ** 
use "$rawdata/RTB/Migrationer", clear
drop if PostTyp=="Utv"
duplicates tag LopNr, gen(dup)
sort LopNr Datum
bysort LopNr: gen time = _n
keep if time==1
gen im_mult_times = 1 if dup>0
replace im_mult_times =0 if im_mult_times ==.
keep LopNr Datum im_mult_times Landnamn
compress
save "$usingdata/Migrationer.dta", replace	


** Birth data **
use "$rawdata/RTB/Fodelseuppg.dta", clear
duplicates tag LopNr, gen(dup)
drop if dup > 0
keep LopNr FodArMan UtlSvBakG Fodelselan fodelseforsnamn Kon F_delselandGrupp
compress
save "$usingdata/Fodelseuppg.dta", replace		

** Country of origin (grouped) **
use "$rawdata/RTB/FodelselandIFAUGrupp", clear
duplicates tag LopNr, gen(dup)
drop if dup > 0
keep LopNr Fodelselandgrp IFAUkod
compress
save "$usingdata/FodelselandIFAUGrupp.dta", replace		
									
** LISA-data 2010 **
use "$rawdata/LISA/LISA_2010", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop AterPNr Senpnr dup 
ds
foreach k in `r(varlist)' {
	rename `k' `k'_10
}
rename LopNr_10 LopNr 
compress
save "$usingdata/LISA_2010.dta", replace		

** LISA data 2009 **
use "$rawdata/LISA/LISA_2009", clear
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop AterPNr Senpnr dup
ds
foreach k in `r(varlist)' {
	rename `k' `k'_09
}
rename LopNr_09 LopNr 
compress
save "$usingdata/LISA_2009.dta", replace


** LISA-data 1994 **
use "$rawdata/LISA/LISA_1994", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop AterPNr Senpnr dup
ds
foreach k in `r(varlist)' {
	rename `k' `k'_94
}
rename LopNr_94 LopNr 
compress
save "$usingdata/LISA_1994.dta", replace

** LISA-data 1993 **
use "$rawdata/LISA/LISA_1993", clear 
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop AterPNr Senpnr dup
ds
foreach k in `r(varlist)' {
	rename `k' `k'_93
}
rename LopNr_93 LopNr 
compress
save "$usingdata/LISA_1993.dta", replace


** Voter turnout 2010 **
use "$rawdata/VD/Valdelt_2010", clear
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop AterPNr SenPNr dup
ds
foreach k in `r(varlist)' {
	rename `k' `k'_10
}
rename LopNr_10 LopNr 
compress
save "$usingdata/Valdelt_2010.dta", replace	

** Voter turnout 1994 **
use "$rawdata/VD/Valdelt_1994rkl", clear
duplicates tag LopNr, gen(dup)
drop if dup > 0
drop dup
ds
foreach k in `r(varlist)' {
	rename `k' `k'_94
}
rename LopNr_94 LopNr 
compress
save "$usingdata/Valdelt_1994.dta", replace	

************
clear all
capture log close
